library(data.table)
library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v ggplot2 3.3.5     v purrr   0.3.4
## v tibble  3.1.1     v dplyr   1.0.6
## v tidyr   1.1.3     v stringr 1.4.0
## v readr   1.4.0     v forcats 0.5.1
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::between()   masks data.table::between()
## x dplyr::filter()    masks stats::filter()
## x dplyr::first()     masks data.table::first()
## x dplyr::lag()       masks stats::lag()
## x dplyr::last()      masks data.table::last()
## x purrr::transpose() masks data.table::transpose()
library(ggplot2)
library(gridExtra)
## 
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
## 
##     combine
library(heatmaply)
## Loading required package: plotly
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
## Loading required package: viridis
## Loading required package: viridisLite
## 
## ======================
## Welcome to heatmaply version 1.3.0
## 
## Type citation('heatmaply') for how to cite the package.
## Type ?heatmaply for the main documentation.
## 
## The github page is: https://github.com/talgalili/heatmaply/
## Please submit your suggestions and bug-reports at: https://github.com/talgalili/heatmaply/issues
## You may ask questions at stackoverflow, use the r and heatmaply tags: 
##   https://stackoverflow.com/questions/tagged/heatmaply
## ======================
library(RColorBrewer)
library(vcd)
## Loading required package: grid
load("tf0.rdata")
#load("tf3.rdata")
#load("tf4.rdata")
#load("CX.rdata")
load("tf4.rdata")
par(mfrow=c(1,2),cex=0.7)
table(A0$age, useNA='ifany') %>% barplot(main="Age Groups",las=2) #人數分布在a34、a39、a44 
table(A0$area, useNA='ifany') %>% barplot(main="Areas",las=2)  #地區分布在南港區及汐止區

cats = Z0 %>% 
  group_by(cat) %>% 
  summarise(
    noProd = n_distinct(prod), #取商品唯一ID
    totalQty = sum(qty), #總銷售數量
    totalRev = sum(price), #總收益
    totalGross = sum(price) - sum(cost),#總毛利
    grossMargin = totalGross/totalRev, #邊際毛利
    avgPrice = totalRev/totalQty#平均價格
  )
cats$ID <- as.character(cats$cat)


(g1 <- cats %>%
  top_n(10, totalRev) %>%
  ggplot(aes(x = ID, y = totalRev)) +
  geom_col()) #560102及560402的銷售金額最高

(g2 <- cats %>%
  top_n(10, totalGross) %>%
  ggplot(aes(x = ID, y = totalGross)) +
  geom_col()) #320402、560201、560402的總毛利最高

(g3 <- cats %>%
  top_n(10, avgPrice) %>%
  ggplot(aes(x = ID, y = avgPrice)) +
  geom_col()) #平均價格前10名均落在3000~4000

(g4 <- cats %>%
  top_n(10, grossMargin) %>%
  ggplot(aes(x = ID, y = grossMargin)) +
  geom_col()) #邊際毛利均落在0.4以上

top10_Rev = cats %>% top_n(10,totalRev)
top10_Gross = cats %>% top_n(10,totalGross)
top = merge(top10_Rev,top10_Gross,all = F)

col6 = c('seagreen','gold','orange',rep('red',3))
options(scipen = 999)
g = top %>% ggplot(aes(x=totalRev,y=totalGross,size=totalQty,col=avgPrice)) + geom_point(alpha=0.7)+ geom_text(aes(label=cat,size=0.4),col="black") + scale_size(range=c(5,20)) + scale_color_gradientn(colors=col6) + theme_bw()

ggplotly(g)
# (A0_rfm <- A0 %>%
#   select(cust, r, s, f, m, rev) %>%
#   summarize(cust, r, f, m, rev, avg_f = (s-r) / (f-1)))
# sum(is.na(A0_rfm$avg_f))
# table(A0_rfm$avg_f) %>%barplot
X0$wday = format(X0$date, "%u")
mtx1 = table(X0$age,X0$wday) %>% prop.table(1) 
mtx1 = as.data.frame.matrix(mtx1)
heatmaply(mtx1,Rowv=F,Colv=F)
#年輕人特別喜歡在周末來消費,而老人消費頻率則較為平均
mtx2 = table(X0$area,X0$wday) %>% prop.table(1)
mtx2 = as.data.frame.matrix(mtx2)
heatmaply(mtx2,Rowv=F,Colv=F)
#各地區的人普遍都喜歡在周末消費,其中以信義區和松山區更為明顯
top_10 = cats %>% top_n(10, totalGross) %>% pull(cat)
Z_top10 = Z0 %>% filter(cat %in% top$cat)
Z_top10$wday = format(Z_top10$date, "%u")
Z_top10 = Z_top10 %>% mutate(Gross = price-cost)
a = Z_top10 %>% count(cat,wday,wt=Gross)
xtabs(n~cat+wday,data=a) %>% as.data.frame.matrix %>% heatmaply(Rowv=F,Colv=F)
top10_Rev = cats %>% top_n(10,totalRev)
top10_Gross = cats %>% top_n(10,totalGross)
top = merge(top10_Rev,top10_Gross,all = F)
MOSA = function(formula, data) mosaic(formula, data, shade=T, 
  margins=c(0,1,0,0), labeling_args = list(rot_labels=c(90,0,0,0)),
  gp_labels=gpar(fontsize=9), legend_args=list(fontsize=9),
  gp_text=gpar(fontsize=7),labeling=labeling_residuals)
MOSA(~cat+age, Z0[Z0$cat %in% top$cat,])

A0$cust <- as.numeric(A0$cust)
A0_cluster <- A0 %>%
  select(cust, r, s, f, m)
A0_cluster_scale = scale(A0_cluster[,c(2:5)]) %>% data.frame
sapply(A0_cluster_scale, mean)
##                           r                           s 
##  0.000000000000000020673174  0.000000000000000076871627 
##                           f                           m 
## -0.000000000000000001795684  0.000000000000000031208240
sapply(A0_cluster_scale,sd)
## r s f m 
## 1 1 1 1
d = dist(A0_cluster_scale, method="euclidean")#歐式距離
hc = hclust(d, method='ward.D') #華德法
plot(hc)

kg = cutree(hc, k=7)
table(kg)
## kg
##    1    2    3    4    5    6    7 
## 5884 7549 7439 2262 5310 2863  934
names(A0_cluster_scale) =c(
  "最近消費天數","第一次消費天數","頻率","平均交易金額")
kg1 = kg %>% as.factor() %>% as.data.frame()
colnames(kg1) <- "group"

sapply(split(A0_cluster,kg), colMeans) %>% round(2)  # 原始尺度 
##               1          2          3          4          5          6
## cust 1442167.64 1504745.41 1539985.66 1528642.46 1448635.80 1349386.49
## r         24.63      22.08      88.19      47.67      19.46       8.43
## s         98.30      32.37      93.57      53.50     101.98     109.90
## f          3.77       1.66       1.38       1.32       4.01       9.60
## m        500.61     692.20     736.40    3218.22    1647.82     606.97
##               7
## cust 1280019.29
## r          4.81
## s        116.03
## f         24.16
## m        646.34
sapply(split(A0_cluster_scale,kg), colMeans) %>% round(2)  # 標準尺度
##                    1     2     3     4     5     6     7
## 最近消費天數   -0.38 -0.46  1.51  0.30 -0.53 -0.86 -0.97
## 第一次消費天數  0.51 -1.42  0.38 -0.80  0.62  0.85  1.03
## 頻率            0.02 -0.42 -0.48 -0.49  0.06  1.22  4.24
## 平均交易金額   -0.51 -0.31 -0.27  2.32  0.68 -0.40 -0.36
plot.new() 
c = brewer.pal(12,"Set3")[2:5] # 設定顏色
par(cex=0.8,family="黑體-繁 中黑")
split(A0_cluster_scale,kg) %>% sapply(colMeans) %>% barplot(beside=T,col =c,legend = TRUE, xlim=c(0,45), names.arg = c("生活", "瞌睡", "新顧客", "沉睡", "潛力股", "忠實", "員工"), xlab="分群")
## Warning in axis(if (horiz) 2 else 1, at = at.l, labels = names.arg, lty =
## axis.lty, : font family not found in Windows font database
## Warning in axis(if (horiz) 1 else 2, cex.axis = cex.axis, ...): font family not
## found in Windows font database

## Warning in axis(if (horiz) 1 else 2, cex.axis = cex.axis, ...): font family not
## found in Windows font database

## Warning in axis(if (horiz) 1 else 2, cex.axis = cex.axis, ...): font family not
## found in Windows font database

## Warning in axis(if (horiz) 1 else 2, cex.axis = cex.axis, ...): font family not
## found in Windows font database

## Warning in axis(if (horiz) 1 else 2, cex.axis = cex.axis, ...): font family not
## found in Windows font database

## Warning in axis(if (horiz) 1 else 2, cex.axis = cex.axis, ...): font family not
## found in Windows font database

## Warning in axis(if (horiz) 1 else 2, cex.axis = cex.axis, ...): font family not
## found in Windows font database

## Warning in axis(if (horiz) 1 else 2, cex.axis = cex.axis, ...): font family not
## found in Windows font database

## Warning in axis(if (horiz) 1 else 2, cex.axis = cex.axis, ...): font family not
## found in Windows font database

## Warning in axis(if (horiz) 1 else 2, cex.axis = cex.axis, ...): font family not
## found in Windows font database

## Warning in axis(if (horiz) 1 else 2, cex.axis = cex.axis, ...): font family not
## found in Windows font database

group = A0_cluster %>% cbind(kg1) %>%
  mutate (customer = case_when(group == 1 ~"生活型顧客",
                              group == 2 ~"沉睡型顧客",
                              group == 3 ~"沉睡型顧客",
                              group == 4 ~"節慶型顧客", 
                              group == 5 ~"採購型顧客",
                              group == 6 ~"忠實顧客",
                              group == 7 ~"員工"))
cust_group <- group %>%
  select(cust, group, customer) %>%
  merge(A0, by = "cust")

MOSA(~group+age, data=cust_group)

MOSA(~group+area, data=cust_group)

```